package core_sql.day06_sql

import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}

/**
  * sql形式的wordcount
  */
object SqlWordCount {
  def main(args: Array[String]): Unit = {
    //如果在Spark2.0里，想要使用Dataset、DataFrame、SQL，程序执行的入口是SparkSession

    val session: SparkSession = SparkSession.builder()
      .appName("SQLWordCount")
      .master("local[*]")
      .getOrCreate()

    //指定以后从哪里读取数据
    val lines: Dataset[String] = session.read.textFile(args(0))

    //导入隐式转换
    import session.implicits._
    val words: Dataset[String] = lines.flatMap(_.split(" "))

    //创建一个临时视图，可以理解为一张表
    words.createTempView("v_words")

    val r: DataFrame = session.sql("select value word,count(*) counts from v_words GROUP BY word ORDER BY counts DESC")

    r.show()

    session.stop()
  }

}
